In this study we will analyse economical Data collected by the Worldbank. For this we will collect variables available through the Worldbak API using the R-Package WDI, which wraps the API.
To simplify the Data we will aggregate it by regions as defined by the Worldbank, which you can see below. We also decided to use the same color scheme for all plots, which is Altair10 which you can as well see below.
library(tmap)
mappings = map_data("world")
pal = tableau_color_pal(
palette = "Tableau 10")(10)
data("World")
World = World%>%
filter(continent!="Antarctica")
World$wb=countrycode(sourcevar = World$iso_a3,
origin = "iso3c",
destination = "region")
World = World%>%
filter(!is.na(wb))
tmap_mode("view")
tm_shape(World) +
tm_polygons("wb",palette=pal)
#
# ggplot(data = World) +
# geom_sf(aes(fill=wb),color="white",size=.1)+
# theme(legend.position = "none",x.axis.text="none")+
# scale_fill_tableau()+
# theme_igray()
# tmap_mode("plot")
#
# tm_shape(World) +
# tm_polygons("wb",palette=pal)
library(WDI)
library(dplyr)
library(tidyr)
library(sf)
library(rnaturalearth)
library(janitor)
library(ggplot2)
library(gganimate)
library(gifski)
library(ggimage)
library(av)
library(ggthemes)
start_year <- 1960
end_year <- 2021
worldbank_data = WDI("NY.GDP.MKTP.CD", country = "all" ,start=start_year,end=end_year)
worldbank_data=worldbank_data%>%
rename(gdp = `NY.GDP.MKTP.CD`)
world_regions <- ne_countries(scale = "medium", returnclass = "sf")%>%
filter(sovereignt!="Antarctica")
gdp_ready <- worldbank_data%>%
left_join(st_drop_geometry(world_regions[c("iso_a2","region_wb")]),by=c("iso2c"="iso_a2"))%>%
rename(Region = region_wb)%>%
drop_na()
rank_top <- 20
# Arrange the ranking
gdp_set <- gdp_ready %>%
group_by(year) %>%
mutate(rank = rank(-gdp),
gdp_rel = gdp/gdp[rank==1],
gdp_lbl = paste0(" ",round(gdp/1e9))) %>%
group_by(country) %>%
filter(rank <= rank_top) %>%
ungroup()
gdp_set_2019 <- gdp_set %>%
filter(year == 2019)
# Building the static plot
static_plot <- ggplot(gdp_set, aes(rank, group = country)) +
scale_fill_tableau(guide = guide_legend(title.theme = element_text(size = 30),
label.theme = element_text(size = 25))) +
geom_tile(aes(y = gdp/2,
height = gdp, fill = Region,
width = 0.9), alpha = 0.8, color = NA) +
geom_text(aes(y = -2e12, label = paste(country)), vjust = 0.2, hjust = 1, size=7) +
geom_flag(aes(y=-7e11, image = iso2c)) +
geom_text(aes(y=gdp,label = gdp_lbl, hjust=0), size=8) +
geom_text(aes(x=rank_top, y=max(gdp) , label = as.factor(year)), vjust = 0.2, alpha = 0.5, col = "gray", size = 20) +
coord_flip(clip = "off", expand = FALSE) +
scale_y_continuous(labels = scales::comma) +
scale_x_reverse() +
theme(axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
panel.grid.major.x = element_line( size=.1, color="grey" ),
panel.grid.minor.x = element_line( size=.1, color="grey" ),
plot.title=element_text(size=25, hjust=0, face="bold", colour="black", vjust=-1),
plot.subtitle=element_text(size=18, hjust=1, face="italic", color="grey"),
plot.caption =element_text(size=14, hjust=1, face="italic", color="grey"),
plot.background=element_blank(),
plot.margin = margin(2, 2, 2, 8, "cm"))
#static_plot
# Animating the static plot
animated <- static_plot +
transition_states(year, transition_length = 3, state_length = 0, wrap = FALSE) +
view_follow(fixed_x = TRUE) +
ease_aes('linear')+
enter_fade()+
exit_fade() +
labs(title = paste("Top",toString(rank_top),"World GDP,",toString(start_year),"-",toString(end_year)),
subtitle = "GDP in Billions USD (constant 2010)",
caption = "Data Source: World Bank Data")
## Render the animated chart to mp4 and save
anim_save(
filename = "gdp_barchart_race.mp4",
animation = animate(animated, 600,
fps = 20,
width = 1500,
height = 1000,
renderer = av_renderer()
)
)
In this section, there is an animated graph of top-20 countries by GDP from 1960 to 2021. The GDP data are imported from worldbank using API from WDI library. The chart is plotted using ggplot, animated using gganimate and saved to mp4 using av.
library(altair)
library(reticulate)
#reticulate::py_config()
#altair::check_altair()
#altair::install_altair()
library("sf")
library("rnaturalearth")
library("rnaturalearthdata")
library(tidyverse)
library(countrycode)
library(plotly)
library(broom)
library(purrr)
library(ggthemes)
library(tmap)
library(countrycode)
data("World")
load("worldbankindicators.RData")
World$iso2c=countrycode(sourcevar = World$iso_a3,
origin = "iso3c",
destination = "iso2c")
averaged_data <- worldbank_data%>%
group_by(iso2c, country, Indicator)%>%
summarise(Values=mean(Values,na.rm=T))
current_data <- worldbank_data %>%
filter(year==2000)
joined_data <- World%>%left_join(current_data, by='iso2c')
inflation <- filter(joined_data, Indicator=='Inflation')
unemployment <- filter(joined_data, Indicator =='Unemployment')
# life expectancy vs GDP per capita for every Country
joined_data %>%
filter(continent %in% c('Asia', 'Africa', 'Europe', 'South America', 'Oceania', 'North America')) %>%
ggplot(aes(x = (gdp_cap_est),
y = (life_exp),
size= pop_est,
)) +
geom_point(alpha=0.1)+
xlim(0,90000) +
geom_smooth(se=F)+
#facet_wrap(~continent)+
labs(title='Life expectancy explained by GDP',
x = 'GDP per capita',
y = 'Life expectancy',
colour = 'continent')
# life expectancy vs GDP per capita for each continent
joined_data %>%
filter(continent %in% c('Asia', 'Africa', 'Europe', 'South America', 'Oceania', 'North America')) %>%
ggplot(aes(x = (gdp_cap_est),
y = (life_exp),
size= pop_est,
color=continent
)) +
geom_point(alpha = 0.1)+
xlim(0,65000) +
geom_smooth(method=lm, se=F)+
facet_wrap(~continent)+
labs(title='Life expectancy and GDP per capita',
x = 'GDP per capita',
y = 'Life expectancy',
colour = 'continent')
# Happiness vs GDP per capita for each continent
joined_data %>%
filter(continent %in% c('Asia', 'Africa', 'Europe', 'South America', 'Oceania', 'North America')) %>%
ggplot(aes(x = log(gdp_cap_est),
y = HPI,
size= pop_est,
color=continent
)) +
geom_point(alpha = 0.1)+
facet_wrap(~continent)+
labs(title='Happiness and GDP per capita',
x = 'log (GDP per capita)',
y = 'Happiness Index',
colour = 'continent')
# Happiness vs Inequality for all countries
joined_data %>%
filter(continent %in% c('Asia', 'Africa', 'Europe', 'South America', 'Oceania', 'North America')) %>%
ggplot(aes(x = inequality,
y = HPI,
size= pop_est,
color=pop_est_dens
)) +
geom_point(alpha = 0.1)+
geom_smooth(method=lm, se=F)+
labs(title='Happiness and Inequality',
x = 'Inequality',
y = 'Happiness Index')
# Life Expectancy And Happiness
joined_data %>%
filter(continent %in% c('Asia', 'Africa', 'Europe', 'South America', 'Oceania', 'North America')) %>%
ggplot(aes(x = life_exp,
y = HPI,
size= pop_est,
color=pop_est_dens
)) +
geom_point(alpha = 0.1)+
geom_smooth(method=lm, se=F)+
labs(title='Happiness and Life Expectancy',
x = 'Life Expectancy',
y = 'Happiness Index')
# plotting inflation of year 2020
ggplot(data = inflation) +
geom_sf(aes(fill=Values)) +
colorspace::scale_fill_continuous_sequential(palette='viridis') +
labs(caption= 'Inflation Map 2020')
# reshape long to wide
columns <- c('iso_a3', 'country', 'Indicator', 'Values', 'continent')
filtered <- joined_data[columns] %>%
st_drop_geometry()
# filtered %>%
# mutate(Indicator = as.factor(Indicator))
wide_data <- filtered %>%
pivot_wider(names_from = Indicator, values_from = Values)
# inflation vs interest rate
wide_data %>%
ggplot(aes(y = log(Inflation),
x = `Interest Rate`,
size= GDP,
)) +
geom_point(alpha=0.3)+
geom_smooth(method=lm, se=F)+
#facet_wrap(~continent)+
labs(title='Inflation and Interest rate',
x = 'log(Inflation)',
y = 'Interest Rate')
# inflation vs unemployment?
western <- wide_data %>%
filter(continent == 'Europe' | continent =='North America')
western %>%
ggplot(aes(x = (Unemployment),
y = (Inflation),
size= Population,
)) +
geom_point(alpha=0.3)+
geom_smooth(method=lm, se=F)+
#facet_wrap(~continent)+
labs(title='Inflation and unemployment',
x = 'Unemployment rate',
y = 'Inflation')
In this part we will have a detailed look at Consumer Price Inflation as collected by the Worldbank in the “A Global Database of Inflation” [https://www.worldbank.org/en/research/brief/inflation-database]
Besides Energy Price Inflation and Food Price Inflation we will also have a look at Headline and Core Inflation.
Headline inflation is the raw inflation figure reported through the Consumer Price Index (CPI) that is released monthly by the Bureau of Labor Statistics (BLS). The CPI calculates the cost to purchase a fixed basket of goods to determine how much inflation is occurring in the broad economy. The CPI uses a base year and indexes the current year’s prices, according to the base year’s values.
[https://www.investopedia.com/terms/h/headline-inflation.asp]
Note: When talking about inflation usually Headline inflation is meant.
Core inflation removes the CPI components that can exhibit large amounts of volatility from month to month, which can cause unwanted distortion to the headline figure. The most commonly removed factors are those relating to the costs of food and energy. Food prices can be affected by factors outside of those attributed to the economy, such as environmental shifts that cause issues in the growth of crops. Energy costs, such as oil production, can be affected by forces outside of traditional supply and demand, such as political dissent.
[https://www.investopedia.com/terms/h/headline-inflation.asp]
Below you can find a timeseries plot of median inflation rates by inflation index by worldbank regions.
load("./finetuning/smoothts.RData")
selection = alt$selection_single(fields=list("region"), bind='legend')
chart <-
alt$Chart()$
encode(
x=alt$X('date:T', axis=alt$Axis(title='Time')),
strokeWidth=alt$value(3),
y=alt$Y('smooth:Q', axis=alt$Axis(title='Annual Inflation Rate')),
color=alt$Color("region:N",legend=alt$Legend(title="Worlbank Region")),
tooltip=list('region:N','date:T'),
opacity=alt$condition(selection, alt$value(1), alt$value(0.2))
)$mark_line()$
interactive()$
add_selection(
selection
)
rule = alt$Chart(
)$mark_rule(color='red')$encode(
y='ref:Q',
strokeWidth=alt$value(4)
)
all = alt$layer(chart,rule,data=data)$facet('series',columns=2,title="Yearly Inflation Rates per month by Index and Worldbank Regions")
all
The faceted timeseries plot is extremely dense in information.
The first thing to notice is that Energy Price and Food Price Inflation rates are much more volatile than the others. This especially applies to the Energy Price in North America region.
In general South Asia seems to have the highest Inflation Rates.
The spike in energy prices caused by recent events is especially visible in Europe and North America Regions. It is much greater than the last maximum spike during the global financial crisis around 2008, which caused spikes in inflation in all regions of the world and also affected all indicators, which seems to be a current trend as well. Such a clear global trend has not been visible in the data since the financial crisis in 2008.
This also leads to the observation that Headline Inflation Rates appear to be especially tied between five of the seven regions, which are East Asia & Pacific, Europe and Central Asia, North America, Latin America and Carribean as well as the Middle East. These are also the regions which in general have a inflation rate around two percent, which is supposed to be favourable for economies. The regions South Asia and Sub-Saharan Africa have higher inflation rates and also see spikes which seem not to appear in other regions. This especially visible in the Core Inflation Rates.
To go into more detail about the relationships we will now have a look at different correlations between the timeseries.
Note! It is “dangerous” to look at correlations of timeseries, as timeseries can have cross-correlation which means they are correlated but with a lag in time. This for example could be the case for energy and food prices where it looks like energy prices rise earlier than food prices. We will still look at the correlations keeping in mind what correlation tells us in this case: A direct linear relationship at the same point in time. The correlations were calculated on the raw grouped data.
m = data%>%
select(region,series,value,date)%>%
pivot_wider(id_cols=c(date,region),names_from=series,values_from=value)
korr = m%>%
select(-date)%>%
nest(data=-c(region))%>%
mutate(korr = map(data,cor,use="pairwise.complete.obs"),
tidied = map(korr, tidy))%>%
unnest(tidied)
t = data.frame(
series = rep(data$series%>%unique,data$region%>%unique%>%length),
region = korr$region,
corr = korr$x
)
colnames(t)[3:6]=data$series%>%unique
t_long=t%>%
pivot_longer(!c(series,region))
alt$Chart(t_long)$mark_rect()$encode(
x='series:N',
y='name:N',
color='value:Q',
tooltip = list("series:N","name:N","value:Q")
)$properties(width=200,height=200)$facet("region:N",columns=4)
From the correlation heatmaps we can see that the correlations are generally positive. Further there are distinguishable patterns for the regions. For East Asia and Pacific the Core Consumer Price seems to be the least correlated with the other inflation rates. The Headline Consumer Price seems to be strongly influenced by the Energy and Food Prices. In general North America and South Asia have “odd” patterns of Inflation when compared to other Regions. In both regions the correlation between Energy and Food Prices is extemely low. The Food Price also seems to have little Influence on the Headline Consumer Price Index. If we go back to the Timeseries plot above we can notice, that North America seems to have extremely low and stable Food Inflation Rates.
For further analysis we will look at a switched way of correlations by switching regions and indices.
m = data%>%
select(region,series,value,date)%>%
mutate(region2=region)%>%
mutate(region=series,series=region2)%>%
pivot_wider(id_cols=c(date,region),names_from=series,values_from=value)
korr = m%>%
select(-date)%>%
nest(data=-c(region))%>%
mutate(korr = map(data,cor,use="pairwise.complete.obs"),
tidied = map(korr, tidy))%>%
unnest(tidied)
d = data%>%
mutate(region2=region)%>%
mutate(region=series,series=region2)
t = data.frame(
series = rep(d$series%>%unique,d$region%>%unique%>%length),
region = korr$region,
corr = korr$x
)
colnames(t)[3:9]=d$series%>%unique
t_long=t%>%
pivot_longer(!c(series,region))
alt$Chart(t_long)$mark_rect()$encode(
x='series:N',
alt$Y('name:N', axis=alt$Axis(ticks=FALSE, domain=FALSE)),
color='value:Q',
tooltip = list("series:N","name:N","value:Q")
)$properties(width=300,height=300)$facet("region:N",columns=2)
In this last part we will look into correlation between Price Indices between regions. When we look at the Headline Consumer Price Index we also see, what we saw above when we noticed that South Asia and Sub-Saharan Africa seem to be less correlated with the other regions. This however seems to be the case for North America as well, which we oversaw because it has generally low inflation rates. North America seems to have a rather strong connection to Europe & Central Asia however.
If we next look at Core Consumer Inflation Rates Correlations we see, that these have generally low correlations, which is probably the case because these are not clearly defined and can vary in their calculation dependoing on the regions.
When we look at Food Prices, we see that the Middle East & North Africa Food Prices are rather correlated with the Food Prices in East Asia, Europe and Latin America. Whereas Europe seems to have stronger correlations with the Middle East & North Africa as well as Latin America.
When looking at energy prices the correlation between East Asia & Pacific, Europe & Central Asia as well as Latin America & Carribean is once again visible.
The opposite seems to be the case for North America and South Asia as well as North America and Middle East & North Africa which seem to be generally weekly correlated.
We will not go into further detail. And interesting next step would be to have a look into trade data between regions.